rm(list=ls())

library(tidyverse)
library(modelsummary)
library(marginaleffects)
library(plm)
library(lme4)

# Environment prep
try(setwd("___your directory here___") , silent=TRUE)

dapils <- read.csv("data/kab_dapil_codes.csv")
to.merge <- read.csv("data/dapil_dpr.csv")
dapils <- left_join(dapils, to.merge, by = "kab_code") |>
  rename(dapil_code = dapil.x, dapil_name = dapil.y)

pres <- read.csv("data/pres.csv") |> select(-X)
dpr.kab <- read.csv("data/kab_dprRI_party.csv") |> 
  select(-jml_suara_partai) |>
  group_by(kab_id) %>% 
  mutate(totalvotes = sum(jml_suara_total)) %>% 
  ungroup() |>
  mutate(share = jml_suara_total/totalvotes) |> 
  select(-jml_suara_total,-totalvotes)
dpr.spread <- spread(dpr.kab, key=party, value=share)
enpp.sqs <- dpr.spread |>
  mutate(across(2:19, function(x) x^2))
enpp <- enpp.sqs |> 
  mutate(enpp = 1/rowSums(enpp.sqs[2:19])) |>
  select(kab_id, enpp)
dpr.to.bind <- left_join(dpr.spread,enpp)
biggest.party <- apply(dpr.spread[2:19],1,which.max)
#biggest.party <- colnames(dpr.spread[2:19])[apply(dpr.spread[2:19],1,which.max)]
dpr.to.bind <- cbind(dpr.to.bind,biggest.party) |>
  select(!(`1`:`24`)) |> rename(kab_code = kab_id, BiggestParty=3)

dpr <- left_join(pres, dpr.to.bind, by="kab_code")
dpr <- left_join(dpr, dapils, by="kab_code")
dpr <- dpr |> rename(prov_name = prov_name.x) |> select(-prov_name.y)
dpr$BiggestParty <- as.numeric(dpr$BiggestParty)

dpr <- dpr |>
  mutate(plotprov=fct_relevel(plotprov,c("Aceh","N. Sumatra","W. Sumatra",
                                         "Riau","Riau Islands","Bangka-Belitung",
                                         "Jambi","S. Sumatra","Bengkulu",
                                         "Lampung","Banten","DKI Jakarta",
                                         "West Java","Central Java","Yogyakarta",
                                         "East Java","Bali","W. Nusa Tenggara",
                                         "E. Nusa Tenggara","W. Kalimantan",
                                         "S. Kalimantan","C. Kalimantan",
                                         "E. Kalimantan","N. Kalimantan",
                                         "N. Sulawesi","Gorontalo","C. Sulawesi",
                                         "W. Sulawesi","S. Sulawesi","S.E. Sulawesi",
                                         "Maluku","N. Maluku","S.W. Papua",
                                         "W. Papua","Papua","C./S./H. Papua")))


parties <- table(dpr$BiggestParty,dpr$winner)
parties <- data.frame(cbind(rownames(parties),parties))
parties[,2] <- as.numeric(parties[,2])
parties[,3] <- as.numeric(parties[,3])
parties[,4] <- as.numeric(parties[,4])
colnames(parties) <- c("Party","Anies","Ganjar","Prabowo")
parties <- as_tibble(parties)
parties <- parties |>
  mutate(AniesPct = round(Anies/sum(Anies),3),
         GanjarPct = round(Ganjar/sum(Ganjar),3),
         PrabowoPct = round(Prabowo/sum(Prabowo),3))
write.csv(parties, "output/table2.csv")



# create dapil-level equivalent
dpr.dapil <- read.csv("data/kab_dprRI_party.csv") |> 
  select(-jml_suara_partai)
dpr.dapil <- left_join(dpr.dapil, dapils, by = join_by(kab_id == kab_code)) |>
  select(-dapil_name, -magnitude) 
dpr.dapil.sums <- dpr.dapil |> group_by(party,dapil_code) |> 
  summarize(partyvotes = sum(jml_suara_total)) |> arrange(dapil_code, party) |>
  ungroup() |>
  group_by(dapil_code) |> mutate(totalvotes = sum(partyvotes)) |>
  ungroup() |> 
  mutate(share = partyvotes/totalvotes) |> 
  select(-partyvotes,-totalvotes)
dpr.dapil.spread <- spread(dpr.dapil.sums, key=party, value=share)
enpp.sqs.dapil <- dpr.dapil.spread |>
  mutate(across(2:19, function(x) x^2))
enpp.dapil <- enpp.sqs.dapil |> 
  mutate(enpp = 1/rowSums(enpp.sqs.dapil[2:19])) |>
  select(dapil_code, enpp)
dapil.to.bind <- left_join(dpr.dapil.spread, enpp.dapil)
biggest.party.dapil <- apply(dpr.dapil.spread[2:19],1,which.max)
dapil.to.bind <- cbind(dapil.to.bind,biggest.party.dapil) |>
  select(!(`1`:`24`)) |> rename(BiggestParty=3)
pres.dapil <- left_join(pres,dapils, by = "kab_code") |>
  select(Anies, Prabowo, Ganjar, dapil_code) |>
  group_by(dapil_code) |> 
  summarize(aniesvotes = sum(Anies),
            prabowovotes = sum(Prabowo),
            ganjarvotes = sum(Ganjar)) |> arrange(dapil_code) |>
  mutate(
    winner = case_when(
      aniesvotes>ganjarvotes & aniesvotes > prabowovotes ~ "Anies",
      prabowovotes>ganjarvotes & prabowovotes>aniesvotes ~ "Prabowo",
      ganjarvotes>prabowovotes & ganjarvotes>aniesvotes ~ "Ganjar")
  )

dapil.merged <- left_join(pres.dapil, dapil.to.bind, by="dapil_code")
parties <- table(dapil.merged$BiggestParty,dapil.merged$winner)
parties <- data.frame(cbind(rownames(parties),parties))
parties[,2] <- as.numeric(parties[,2])
parties[,3] <- as.numeric(parties[,3])
colnames(parties) <- c("Party","Anies","Prabowo")
parties <- as_tibble(parties)
parties <- parties |>
  mutate(AniesPct = round(Anies/sum(Anies),3),
         PrabowoPct = round(Prabowo/sum(Prabowo),3))
write.csv(parties, "output/table2.csv")





p1 <- ggplot(dpr, aes(x=ethfrac, y=enpp)) + 
  geom_point(shape=1) + 
  geom_smooth(method = "lm", se = FALSE, col="black", linewidth=.5) + 
  theme_classic() + facet_wrap(~plotprov, ncol=6) +
  labs(y="ENP", x="Ethnic Fractionalization") +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        panel.border = element_blank(),
        strip.text.x = element_text(size = 6),
        axis.text=element_text(size=5))

p2 <- ggplot(dpr, aes(x=ethfrac, y=enpp)) + 
  geom_point(shape=1) + 
  geom_smooth(method = "lm", col="black", linewidth=.5) + 
  labs(y="ENP", x="Ethnic Fractionalization") +
  theme_classic()

p3 <- ggplot(dpr, aes(x=magnitude, y=enpp)) + 
  geom_point(shape=1) +
  geom_smooth(method = "lm", se = FALSE, col="black", linewidth=.5) + 
  labs(y="ENP", x="District Magnitude") +
  theme_classic() + facet_wrap(~plotprov, ncol=6) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        panel.border = element_blank(),
        strip.text.x = element_text(size = 6),
        axis.text=element_text(size=5))

p4 <- ggplot(dpr, aes(x=magnitude, y=enpp)) + 
  geom_point(shape=1) +
  geom_smooth(method = "lm", col="black", linewidth=.5) + 
  labs(y="ENP", x="District Magnitude") +
  theme_classic()

to.save <- grid.arrange(p1,p3,p2,p4, ncol=2, heights=c(1.5,1))
ggsave(filename = "output/figure6.png", plot = to.save, height=7)
ggsave(filename = "output/figure6.pdf", plot = to.save, height=7)


mod.lm <- lm(enpp~magnitude + ethfrac + muslim + devindex + plotprov + largestgroup, 
             data=na.omit(dpr))
mod.re <- lmer(enpp~magnitude +  ethfrac + muslim + devindex + plotprov +  largestgroup +
                 (1 | dapil_code), data=na.omit(dpr))
mod.lm.p <- lm(enpp~pg_share + magnitude +  ethfrac + muslim + devindex + plotprov, 
             data=na.omit(dpr))
mod.lm.p.interact <- lm(enpp~pg_share*poly(ethfrac,2) + magnitude + muslim + devindex + plotprov + largestgroup, 
               data=na.omit(dpr))
mod.lmer.p.interact <- lmer(enpp~pg_share*poly(ethfrac,2) + magnitude + muslim + devindex + 
                              largestgroup + plotprov + (1 | dapil_code), data=na.omit(dpr))

toprint <- c('pg_share'='Prabowo-Gibran Vote Share',
             'ethfrac'='ELF',
             'poly(ethfrac, 2)1'='ELF',
             'poly(ethfrac, 2)2'='ELF Squared',
             'pg_share:poly(ethfrac, 2)1'='ELF * P-G Share',
             'pg_share:poly(ethfrac, 2)2'='ELF Squared * P-G Share',
             'muslim'='Muslim Population Share',
             'relfrac'='Religious Fractionalization',
             'magnitude'='District Magnitude')

modelsummary(list(mod.lm, mod.lm.p, mod.lm.p.interact, mod.lmer.p.interact),
                  coef_map=toprint, stars=TRUE, output="output/table3.docx")


p <- plot_predictions(mod.lm.p.interact, condition = list("ethfrac", "pg_share"="threenum"), vcov="HC0") +
  geom_rug(dpr, mapping = aes(x=ethfrac)) +
  labs(y="ENP",
       x="Ethnic Fractionalization", 
       color="Prabowo-Gibran\n2024 Vote Share",
       fill="Prabowo-Gibran\n2024 Vote Share") +
  theme_classic()
ggsave(filename = "output/figure7.png", plot = p, height=4)
ggsave(filename = "output/figure7.pdf", plot = p, height=4)
